import pandas as pd
import plotly.graph_objects as go
import json
network = json.load(open("network.json"))
node_df = pd.DataFrame(network["nodes"])
nodes = sorted(node_df.name)
edge_df = pd.DataFrame(network["links"])
edge_df["source_index"] = edge_df.source.apply(lambda s: nodes.index(s))
edge_df["target_index"] = edge_df.target.apply(lambda s: nodes.index(s))
# Edges of interest
eoi = edge_df[edge_df.val >= 50]
eoi
| source | target | val | source_index | target_index | |
|---|---|---|---|---|---|
| 3 | The University of Auckland | The University of Auckland | 61 | 8585 | 8585 |
| 18 | PwC New Zealand | PwC New Zealand | 998 | 6805 | 6805 |
| 42 | Deloitte New Zealand | Deloitte New Zealand | 373 | 2316 | 2316 |
| 59 | PwC | PwC | 108 | 6754 | 6754 |
| 60 | PwC New Zealand | PwC | 62 | 6805 | 6754 |
| 76 | ASB Bank | ASB Bank | 66 | 189 | 189 |
| 130 | KPMG New Zealand | KPMG New Zealand | 814 | 4520 | 4520 |
| 221 | PwC | PwC New Zealand | 105 | 6754 | 6805 |
| 268 | Deloitte New Zealand | Deloitte | 98 | 2316 | 2268 |
| 356 | ANZ | ANZ | 72 | 153 | 153 |
| 418 | Deloitte | Deloitte | 231 | 2268 | 2268 |
| 437 | EY | EY | 848 | 2564 | 2564 |
| 627 | Bank of New Zealand | Bank of New Zealand | 62 | 1022 | 1022 |
| 642 | Fonterra | Fonterra | 72 | 3073 | 3073 |
fig = go.Figure(data=[go.Sankey(
node = dict(
label = nodes
),
link = dict(
source = eoi.source_index,
target = eoi.target_index,
value = eoi.val
))])
fig.update_layout(title_text="LinkedIn workforce flows to and from KPMG, Deloitte, PwC and EY in NZ")
fig.show()
node_lookup = {}
for node in network["nodes"]:
node_lookup[node["name"]] = node
node_lookup["PwC New Zealand"]
{'id': 'urn:li:fs_miniCompany:3255299',
'name': 'PwC New Zealand',
'geoLocationName': 'Auckland, New Zealand',
'locationName': 'Auckland, New Zealand',
'industry': 'Management Consulting',
'companyLogoUrl': 'https://media-exp1.licdn.com/dms/image/C4D0BAQGkhUlLEL8LTw/company-logo_',
'val': 2986}
edge_df["source_industry"] = edge_df.source.apply(lambda c: node_lookup[c]["industry"])
edge_df["target_industry"] = edge_df.target.apply(lambda c: node_lookup[c]["industry"])
edge_df
| source | target | val | source_index | target_index | source_industry | target_industry | |
|---|---|---|---|---|---|---|---|
| 0 | PwC New Zealand | Diabetes UK | 1 | 6805 | 2394 | Management Consulting | Health, Wellness and Fitness |
| 1 | Tuakiri, New Zealand Access Federation | PwC New Zealand | 1 | 8809 | 6805 | None | Management Consulting |
| 2 | The University of Auckland | Tuakiri, New Zealand Access Federation | 1 | 8585 | 8809 | Higher Education | None |
| 3 | The University of Auckland | The University of Auckland | 61 | 8585 | 8585 | Higher Education | Higher Education |
| 4 | Orion Health | Wāhine Connect | 1 | 6213 | 9561 | Information Technology and Services | Hospital & Health Care |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 18123 | Bendall Advisory Ltd | Institute of Finacial Advisers | 1 | 1121 | 4228 | None | None |
| 18124 | NZ Olympic Committee | Bendall Advisory Ltd | 1 | 5678 | 1121 | None | None |
| 18125 | ANZ Private Equity | ANZ | 1 | 163 | 153 | None | Banking |
| 18126 | Weight Watchers Australia & New Zealand | Silverfin Capital Limited | 1 | 9355 | 7602 | Health, Wellness and Fitness | Investment Management |
| 18127 | KPMG New Zealand | Weight Watchers Australia & New Zealand | 1 | 4520 | 9355 | Management Consulting | Health, Wellness and Fitness |
18128 rows × 7 columns
industry_edge_df = edge_df.groupby(["source_industry", "target_industry"]).val.sum().reset_index().sort_values(by="val", ascending=False)
industry_edge_df
| source_industry | target_industry | val | |
|---|---|---|---|
| 1772 | Management Consulting | Management Consulting | 3185 |
| 0 | Accounting | Accounting | 1468 |
| 57 | Accounting | Management Consulting | 606 |
| 1386 | Information Technology and Services | Information Technology and Services | 540 |
| 195 | Banking | Banking | 489 |
| ... | ... | ... | ... |
| 1388 | Information Technology and Services | International Affairs | 1 |
| 367 | Civil Engineering | Broadcast Media | 1 |
| 368 | Civil Engineering | Building Materials | 1 |
| 1383 | Information Technology and Services | Individual & Family Services | 1 |
| 2721 | Writing and Editing | Management Consulting | 1 |
2722 rows × 3 columns
industries = sorted(pd.Series([n["industry"] for n in network["nodes"] if n["industry"] is not None]).unique())
industries
['Accounting', 'Airlines/Aviation', 'Alternative Dispute Resolution', 'Alternative Medicine', 'Apparel & Fashion', 'Architecture & Planning', 'Automotive', 'Aviation & Aerospace', 'Banking', 'Biotechnology', 'Broadcast Media', 'Building Materials', 'Business Supplies and Equipment', 'Capital Markets', 'Chemicals', 'Civic & Social Organization', 'Civil Engineering', 'Commercial Real Estate', 'Computer & Network Security', 'Computer Games', 'Computer Hardware', 'Computer Networking', 'Computer Software', 'Construction', 'Consumer Electronics', 'Consumer Goods', 'Consumer Services', 'Cosmetics', 'Dairy', 'Defense & Space', 'Design', 'E-Learning', 'Education Management', 'Electrical/Electronic Manufacturing', 'Entertainment', 'Environmental Services', 'Events Services', 'Executive Office', 'Facilities Services', 'Farming', 'Financial Services', 'Fine Art', 'Fishery', 'Food & Beverages', 'Food Production', 'Fund-Raising', 'Furniture', 'Gambling & Casinos', 'Glass, Ceramics & Concrete', 'Government Administration', 'Government Relations', 'Graphic Design', 'Health, Wellness and Fitness', 'Higher Education', 'Hospital & Health Care', 'Hospitality', 'Human Resources', 'Import and Export', 'Individual & Family Services', 'Industrial Automation', 'Information Services', 'Information Technology and Services', 'Insurance', 'International Affairs', 'International Trade and Development', 'Internet', 'Investment Banking', 'Investment Management', 'Law Enforcement', 'Law Practice', 'Legal Services', 'Legislative Office', 'Leisure, Travel & Tourism', 'Logistics and Supply Chain', 'Luxury Goods & Jewelry', 'Machinery', 'Management Consulting', 'Maritime', 'Market Research', 'Marketing and Advertising', 'Mechanical or Industrial Engineering', 'Media Production', 'Medical Devices', 'Medical Practice', 'Mental Health Care', 'Military', 'Mining & Metals', 'Mobile Games', 'Motion Pictures and Film', 'Museums and Institutions', 'Music', 'Newspapers', 'Nonprofit Organization Management', 'Oil & Energy', 'Online Media', 'Outsourcing/Offshoring', 'Package/Freight Delivery', 'Packaging and Containers', 'Paper & Forest Products', 'Performing Arts', 'Pharmaceuticals', 'Philanthropy', 'Photography', 'Plastics', 'Political Organization', 'Primary/Secondary Education', 'Printing', 'Professional Training & Coaching', 'Program Development', 'Public Policy', 'Public Relations and Communications', 'Public Safety', 'Publishing', 'Railroad Manufacture', 'Real Estate', 'Recreational Facilities and Services', 'Religious Institutions', 'Renewables & Environment', 'Research', 'Restaurants', 'Retail', 'Security and Investigations', 'Semiconductors', 'Sporting Goods', 'Sports', 'Staffing and Recruiting', 'Supermarkets', 'Telecommunications', 'Textiles', 'Think Tanks', 'Tobacco', 'Translation and Localization', 'Transportation/Trucking/Railroad', 'Utilities', 'Venture Capital & Private Equity', 'Veterinary', 'Wholesale', 'Wine and Spirits', 'Wireless', 'Writing and Editing']
industry_edge_df["source_index"] = industry_edge_df.source_industry.apply(lambda i: industries.index(i))
industry_edge_df["target_index"] = industry_edge_df.target_industry.apply(lambda i: industries.index(i))
industry_edge_df.head(50)
| source_industry | target_industry | val | source_index | target_index | |
|---|---|---|---|---|---|
| 1772 | Management Consulting | Management Consulting | 3185 | 76 | 76 |
| 0 | Accounting | Accounting | 1468 | 0 | 0 |
| 57 | Accounting | Management Consulting | 606 | 0 | 76 |
| 1386 | Information Technology and Services | Information Technology and Services | 540 | 61 | 61 |
| 195 | Banking | Banking | 489 | 8 | 8 |
| 1704 | Management Consulting | Accounting | 371 | 76 | 0 |
| 1164 | Higher Education | Management Consulting | 322 | 53 | 76 |
| 1396 | Information Technology and Services | Management Consulting | 308 | 61 | 76 |
| 1016 | Government Administration | Government Administration | 260 | 49 | 49 |
| 852 | Financial Services | Financial Services | 246 | 40 | 40 |
| 871 | Financial Services | Management Consulting | 238 | 40 | 76 |
| 1147 | Higher Education | Higher Education | 229 | 53 | 53 |
| 1710 | Management Consulting | Banking | 221 | 76 | 8 |
| 1757 | Management Consulting | Information Technology and Services | 203 | 76 | 61 |
| 1035 | Government Administration | Management Consulting | 181 | 49 | 76 |
| 2599 | Telecommunications | Telecommunications | 168 | 127 | 127 |
| 230 | Banking | Management Consulting | 166 | 8 | 76 |
| 1740 | Management Consulting | Financial Services | 160 | 76 | 40 |
| 1113 | Higher Education | Accounting | 138 | 53 | 0 |
| 1749 | Management Consulting | Higher Education | 136 | 76 | 53 |
| 433 | Computer Software | Computer Software | 133 | 22 | 22 |
| 1601 | Law Practice | Law Practice | 129 | 69 | 69 |
| 1746 | Management Consulting | Government Administration | 129 | 76 | 49 |
| 1449 | Insurance | Insurance | 119 | 62 | 62 |
| 2422 | Retail | Management Consulting | 112 | 120 | 76 |
| 833 | Financial Services | Accounting | 101 | 40 | 0 |
| 1347 | Information Technology and Services | Accounting | 101 | 61 | 0 |
| 2672 | Utilities | Utilities | 98 | 133 | 133 |
| 2436 | Retail | Retail | 96 | 120 | 120 |
| 37 | Accounting | Higher Education | 93 | 0 | 53 |
| 675 | Education Management | Management Consulting | 90 | 32 | 76 |
| 193 | Banking | Accounting | 85 | 8 | 0 |
| 583 | Dairy | Dairy | 84 | 28 | 28 |
| 5 | Accounting | Banking | 80 | 0 | 8 |
| 31 | Accounting | Financial Services | 79 | 0 | 40 |
| 1722 | Management Consulting | Computer Software | 76 | 76 | 22 |
| 1603 | Law Practice | Management Consulting | 74 | 69 | 76 |
| 44 | Accounting | Information Technology and Services | 73 | 0 | 61 |
| 995 | Government Administration | Accounting | 66 | 49 | 0 |
| 1758 | Management Consulting | Insurance | 64 | 76 | 62 |
| 2027 | Nonprofit Organization Management | Management Consulting | 64 | 92 | 76 |
| 1765 | Management Consulting | Law Practice | 63 | 76 | 69 |
| 918 | Food & Beverages | Food & Beverages | 63 | 43 | 43 |
| 836 | Financial Services | Banking | 61 | 40 | 8 |
| 2378 | Retail | Accounting | 60 | 120 | 0 |
| 1209 | Hospital & Health Care | Hospital & Health Care | 59 | 54 | 54 |
| 1454 | Insurance | Management Consulting | 59 | 62 | 76 |
| 2582 | Telecommunications | Management Consulting | 58 | 127 | 76 |
| 1869 | Marketing and Advertising | Marketing and Advertising | 56 | 79 | 79 |
| 211 | Banking | Financial Services | 54 | 8 | 40 |
eoi = industry_edge_df[industry_edge_df.val > 100]
fig = go.Figure(data=[go.Sankey(
node = dict(
label = industries
),
link = dict(
source = eoi.source_index,
target = eoi.target_index,
value = eoi.val
))])
fig.update_layout(title_text="LinkedIn workforce flows to and from KPMG, Deloitte, PwC and EY in NZ")
fig.show()